# Dickstein, Ho, and Mark (2023)
# This code creates a household-choice level dataset for all insured and uninsured households.

# * # * # * # * # * # * #
# PRELIMINARIES         #
# * # * # * # * # * # * #

setwd("../library")
source("PreliminariesCode.R")

# * # * # * # * # * # * #
# LOADING DATA          #
# * # * # * # * # * # * #

## Loading All Subscribers (including uninsured) dataset
SubDat <- fread("finalsubsdata.csv")

# adding household id:
SubDat[, hhid := 1:.N, ]
print(paste0("There are", nrow(SubDat), "subscriber observations when loaded."))

## Loading All Choice Premiums:
vars_to_keep_prems <- c(
  "subscriberid", "year", "constructed_plan_year", "PAYER_ID", "MNC",
  "grossprem", "best_guess_Subsidy", "best_guess_AV", "plan_AV", "best_guess_netprem")
all_choice_premiums <- fread("AllChoicePremiums.csv", select = vars_to_keep_prems)

### Keeping just 2014 -2016:
all_choice_premiums <- all_choice_premiums[year %in% 2014:2016]
### Keeping just Bronze, Silver, Gold:
all_choice_premiums <- all_choice_premiums[substr(all_choice_premiums$constructed_plan_year, 8, 8) %in% 2:4]
is.identified(all_choice_premiums, c("subscriberid", "constructed_plan_year"))

# * # * # * # * # * # * # * # * # * # * # * # * # * # * #
# CREATING OUTSIDE OPTION FOR EACH SUBSCRIBER - YEAR    #
# * # * # * # * # * # * # * # * # * # * # * # * # * # * #
outsideoptions <- all_choice_premiums[, c(
  list(constructed_plan_year = "Outside_Option"),
    lapply(.SD, function(x) 0)),
  by = c("subscriberid", "year"),
  .SDcols = vars_to_keep_prems[which(!vars_to_keep_prems %in% c("subscriberid", "year", "constructed_plan_year"))]]

all_choice_premiums <- rbind(all_choice_premiums, outsideoptions)

# * # * # * #
# MERGING   #
# * # * # * #
ExplodedDat <- merge(
  SubDat,
  all_choice_premiums,
  by = c("subscriberid", "year"),
  allow.cartesian = T,
  all.x = T,
  suffixes = c(".chosen", ".option"))
ExplodedDat[, choice := as.numeric(chosen_plan_id == constructed_plan_year)]

# * # * # * #
# KONDO!    #
# * # * # * #

# removing M0099
print(paste0(sum(ExplodedDat$choice * as.numeric(ExplodedDat$PAYER_ID == "M0099")) , " subscribers chose PAYER_99"))
ExplodedDat <- ExplodedDat[PAYER_ID != "M0099"]

# removing subscribers without a choice:
ExplodedDat[, nchoice := sum(choice), by = "hhid"]
SubWChoice <- ExplodedDat[, .(nchoice = sum(choice)), by = "hhid"]
print(paste0("There are ", sum(SubWChoice$nchoice == 0) , " subscribers without a choice"))
print(paste0("There are ", sum(SubWChoice$nchoice > 1) , " subscribers with more than one choice"))

# Keep only those with one choice:
ExplodedDat <- ExplodedDat[nchoice == 1]
print(paste0("We remove ", sum(SubWChoice$nchoice != 1), " subscriber who choose an unavailable option."))

# Removing options that no one chooses:
ExplodedDat[, nopchoice := sum(choice), by = "constructed_plan_year"]
OpWChoice <- ExplodedDat[, .(nchoice = sum(choice)), by = "constructed_plan_year"]
print(paste0("There are ", sum(OpWChoice$nchoice == 0) , " plans that are not chosen"))
print(paste0("There are ", sum(OpWChoice$nchoice > 1) , " plans that are chosen"))

# Keep only those who choose a option with positive market share:
ExplodedDat <- ExplodedDat[nopchoice > 0]

# Final cleaning
ExplodedDat[, c("nchoice", "nopchoice", "markettype", "payer_id", "best_guess_metal", "mnc_plantype") := NULL, ]
names(ExplodedDat)[which(names(ExplodedDat) == "fpl_bins")] <- "inc_over_fpl_bins"

# * # * # * #
# SAVING    #
# * # * # * #
write.csv(ExplodedDat, file = "explodeddata.csv", row.names = F)
